---
title: Clean Data - YouGov Survey
author: Weifang Xu, Taylor Chewning, and Qing Wang
date: August 6, 2024
output: pdf_document
fontsize: 11 pt
header-includes:
  \usepackage[T1]{fontenc}
  \usepackage[utf8]{inputenc}
  \usepackage{newpxtext,newpxmath}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
## Clean the working environment and set up the working directory 
rm(list = ls())
setwd("/Users/qingwang/Downloads/Data Replication")

# load the libraries
library(tidyverse)
library(haven)

# import the original dataset
fsu <- read_csv("YouGov/data_YouGov.csv")

# re-code the DV into binary var
df_ces <- fsu %>% mutate(attack = ifelse(alliance_DV1 > 3, 100, 0), # 1 = favor
                         attack_cont = (alliance_DV1 * 25)-25, #rescale from 0-100, higher values = more support for attack
                         alliance = ifelse(exp_4 > 2, 1, 0), # 1 = alliance
                         hmrts = ifelse(exp_4 == 1 | exp_4 == 3, 1, 0)) %>% # 1 = violate
  mutate(male = ifelse(sex == 1, 1, 0), # 1 = man
         edu4 = as.numeric(case_when(educ == 1  ~ '1',
                                     educ == 2  ~ '1',
                                     educ == 3 ~ '2',
                                     educ == 4 ~ '2',
                                     educ == 5 ~ '3',
                                     educ == 6 ~ '4')), # 1=HS and under, 2=some college, 3=4-year college, 4=advanced
         edu4 = (edu4 - 1)/3 , #rescale education
         white = ifelse(race == 1, 1, 0),
         age = (2022-birthyr),
#collapse age into categories to match PureSpectrum         
         age_cat = case_when(age <= 29 ~ '1',
                             age >= 30 & age <= 39 ~ '2',
                             age >= 40 & age <= 49 ~ '3',
                             age >= 50 & age <= 59 ~ '4',
                             age >= 60 & age <= 69 ~ '5',
                             age >= 70 ~ '6'),
         age_cat = ifelse(age_cat == 1, 23.5, #average of age categories, or min
                   ifelse(age_cat == 2, 34.5,
                   ifelse(age_cat == 3, 44.5,
                   ifelse(age_cat == 4, 54.5,
                   ifelse(age_cat == 5, 64.5,
                   ifelse(age_cat == 6, 70, NA)))))),
#collapse income categories to match PureSpectrum         
         income_cat = case_when(income == 1 ~ '1',
                            income == 2 ~ '1',
                            income == 3 ~ '1',
                            income == 4 ~ '2',
                            income == 5 ~ '2',
                            income == 6 ~ '2',
                            income == 7 ~ '2',
                            income == 8 ~ '3',
                            income == 9 ~ '3',
                            income == 10 ~ '4',
                            income == 11 ~ '4',
                            income == 12 ~ '4',
                            income == 13 ~ '5',
                            income == 14 ~ '5',
                            income == 15 ~ '5',
                            income == 16 ~ '5',),
         inc = ifelse(income_cat == 1, 30000, #average of income categories, min and max
               ifelse(income_cat == 2, 50000,
               ifelse(income_cat == 3, 85000,
               ifelse(income_cat == 4, 150000,
               ifelse(income_cat == 5, 200000, NA))))))%>%
  # convert the inc variable unit ($ to 10k$)
  mutate(inc_10k = inc/10000)


# generate mediator vars
df_ces <- df_ces %>% mutate(threat = (alliance_DV4_1+ alliance_DV4_2 + alliance_DV4_3 + alliance_DV4_4)/4,
                            threat = (threat * 25)-25, #rescale from 0-100
                            success = (alliance_DV5_1 + alliance_DV5_2)/2,
                            success = (success * 25)-25, #rescale from 0-100
                            cost = (alliance_DV5_3 + alliance_DV5_4 + alliance_DV5_5 + alliance_DV5_6)/4,
                            cost = (cost * 25)-25, #rescale from 0-100
                            oblig = case_when(alliance_DV2 == 1 ~ 0,
                                              alliance_DV2 == 2 ~ 50,
                                              alliance_DV2 == 3 ~ 100), # 100 = US has moral obligation
                            immoral = case_when(alliance_DV3 == 1 ~ 100,
                                                alliance_DV3 == 2 ~ 0), # 100 = morally wrong for US to attack
                            moral = (oblig - immoral + 100)/2) # 100 = moral to attack, 0 = immoral to attack

glimpse(df_ces)
# save df_mediate dataset
# haven::write_dta(df_ces, "YouGov/YouGov_clean.dta")

### construct dataset for the post-election wave survey data
df_ces_post <- df_ces %>% mutate(attack_post = ifelse(alliance_DV1_post > 3, 100, 0), # 1 = favor
                                 attack_cont_post = (alliance_DV1_post * 25)-25) %>% #rescale from 0-100, higher values = more support for attack
  mutate(male = ifelse(sex_post == 1, 1, 0), # 1 = man
         edu4 = as.numeric(case_when(educ == 1  ~ '1',
                                     educ == 2  ~ '1',
                                     educ == 3 ~ '2',
                                     educ == 4 ~ '2',
                                     educ == 5 ~ '3',
                                     educ == 6 ~ '4')), # 1=HS and under, 2=some college, 3=4-year college, 4=advanced
         edu4 = (edu4 - 1)/3 , #rescale education
         white = ifelse(race == 1, 1, 0),
         age_post = (2022-birthyr_post),
         age_cat = case_when(age <= 29 ~ '1',
                             age >= 30 & age <= 39 ~ '2',
                             age >= 40 & age <= 49 ~ '3',
                             age >= 50 & age <= 59 ~ '4',
                             age >= 60 & age <= 69 ~ '5',
                             age >= 70 ~ '6'),
         age_cat = ifelse(age_cat == 1, 23.5, #average of age categories, or min
                   ifelse(age_cat == 2, 34.5,
                   ifelse(age_cat == 3, 44.5,
                   ifelse(age_cat == 4, 54.5,
                   ifelse(age_cat == 5, 64.5,
                   ifelse(age_cat == 6, 70, NA)))))))

#Create Post-Election Survey Data

df_ces_post <- df_ces_post %>% mutate(threat = (alliance_DV4_1_post + alliance_DV4_2_post + alliance_DV4_3_post + alliance_DV4_4_post)/4,
                                      threat = (threat * 25)-25, #rescale from 0-100
                                      success = (alliance_DV5_1_post + alliance_DV5_2_post)/2,
                                      success = (success * 25)-25, #rescale from 0-100
                                      cost = (alliance_DV5_3_post + alliance_DV5_4_post + alliance_DV5_5_post + alliance_DV5_6_post)/4,
                                      cost = (cost * 25)-25, #rescale from 0-100
                                      oblig = case_when(alliance_DV2_post == 1 ~ 0,
                                                        alliance_DV2_post == 2 ~ 50,
                                                        alliance_DV2_post == 3 ~ 100), # 100 = US has moral obligation
                                      immoral = case_when(alliance_DV3_post == 1 ~ 100,
                                                          alliance_DV3_post == 2 ~ 0), # 100 = morally wrong for US to attack
                                      moral = (oblig - immoral + 100)/2) # 100 = moral to attack, 0 = immoral to attack

glimpse(df_ces_post)

# Save data
# haven::write_dta(df_ces_post, "YouGov/YouGov_post_clean.dta")

```

